import sys

file1=open(sys.argv[1],'r')
total_dict={}
for line in file1:
    if line.startswith('TE'):
        continue
    line_list=line.strip().split('\t')
    if 'helitron' not in line_list[0]:
        continue
    print(line_list)
    type=line_list[-1].split('.')[1]
    species=line_list[-1].split('.')[0]
    if  type not in total_dict:
        total_dict[type]={}
        
    if species not in total_dict[type]:
        total_dict[type][species]=0
    total_dict[type][species]+=float(line_list[2])        
file2=open(sys.argv[2],'w')
list1=[]
list2=[]
out_dict={}
for key,value in total_dict.items():
    
    for key1,value1 in value.items():
        if key1 not in out_dict:
            out_dict[key1]=[0,0]
        if key=='trans':
            out_dict[key1][0]=value1
            # list1.append(value1)
        elif key=='syn':
            out_dict[key1][1]=value1
            # list2.append(value1)
        file2.write(key+'\t'+key1+'\t'+str(value1)+'\n')
for key,value in out_dict.items():
    print(key,value)
    list1.append(value[0])
    list2.append(value[1])
from scipy.stats import wilcoxon, mannwhitneyu        
stat, p_value=wilcoxon(list1, list2)
print(p_value)